Split directories on Qumulo using file capacity aggregate data


In [122]:
import time
import re
from qumulo.rest_client import RestClient

class Path:
    def __init__(self, level, path, sz, bucket):
        self.level = level
        self.path = path
        self.sz = sz
        self.bucket = bucket


class Bucket:   
    def __init__(self):
        self.sz = 0
        self.include_paths = {}
        self.exclude_paths = {}
        
    def add_path(self, path_obj):
        self.sz += path_obj.sz
        self.include_paths[path_obj.path] = path_obj
            
    def show_contents(self):
        print("# Bucket size: %.1f%%" % (self.sz*100,))
        for path, p in self.include_paths.iteritems():
            print(" %-90s  # %.2f%%" % (path, p.sz*100))
        for path, p in self.exclude_paths.iteritems():
            print("-%-90s  " % (path, ))

    def add_exclusions(self, buckets):
        for p in self.include_paths:
            for b in buckets:
                if b is not None:
                    for included_p in b.include_paths:
                        if re.search("^" + p, included_p) \
                        and included_p not in self.include_paths:
                            self.exclude_paths[included_p] = b.include_paths[included_p]


class EasySplit:
    root_cap = None
    root_meta = None
    paths = {}

    def __init__(self, rest_client, split_type='capacity', bucket_count=4, min_dir_size=0.003, only_dirs=False):
        self.paths = {}
        self.rc = rest_client
        self.split_type = split_type
        self.only_dirs = only_dirs
        self.bucket_count = bucket_count
        self.bucket_sz = 1.0 / bucket_count
        self.min_dir_size = min_dir_size


    def process_paths(self):
        paths = sorted(self.paths.values(), key=lambda k: (-k.level, k.sz))
        buckets = []
        for i in range(0, self.bucket_count):
            buckets.append(Bucket())
        bucket_id = 0
        for p in paths:
            if buckets[bucket_id].sz + p.sz > self.bucket_sz and bucket_id < self.bucket_count-1:
                bucket_id += 1
            if p.level == 1:
                buckets[self.bucket_count-1].add_path(p)
            elif p.sz >= self.min_dir_size:
                buckets[bucket_id].add_path(p)

        buckets[self.bucket_count-1].sz = 1 - sum([d.sz for d in buckets[0:-1]])
        for i, b in enumerate(buckets):
            b.add_exclusions([b if ii != i else None for ii, b in enumerate(buckets)])

        for i, b in enumerate(buckets):
            print("------------ %s -----------" % i)
            b.show_contents()


    def walk_dir(self, path, level=1):
        data = self.rc.fs.read_dir_aggregates(path = path)
        data_sz = float(data['total_capacity'])
        meta_sz = float(data['total_meta'])

        sep = '/'
        if path == '/':
            sep = ''
        if level == 1:
            self.root_data = float(data['total_capacity'])
            self.root_meta = float(data['total_meta'])

        sz = data_sz / self.root_data
        if self.split_type == 'files':
            sz = meta_sz / self.root_meta
        self.paths[path] = Path(level=level, path=path, sz=sz, bucket=None)

        for d in data['files']:
            if self.only_dirs and d['type'] == 'FS_FILE_TYPE_DIRECTORY':
                sz = float(d['data_usage']) / self.root_data
                if self.split_type == 'files':
                    sz = float(d['meta_usage']) / self.root_meta
                if sz > self.min_dir_size:
                    self.paths[path].sz -= sz
                    self.walk_dir(path + sep + d['name'], level + 1)
        return self.paths

In [123]:
rc = RestClient("<qumulo-cluster>", 8000)
rc.login("<qumulo-user>", "<qumulo-password>");

es = EasySplit(rest_client=rc, split_type='capacity', only_dirs=True, bucket_count=4, min_dir_size=0.01)
es.walk_dir("/")
es.process_paths()


------------ 0 -----------
# Bucket size: 18.7%
 /testmatt-rep/testcopy/flame                                                                # 1.02%
 /testmatt-rep/testcopy/Elemental                                                            # 1.35%
 /Research                                                                                   # 2.32%
 /Research/Customer_Success/Conferences/Gainsight_Pulse_2016                                 # 2.25%
 /Research/Sales_Engineering/Tools/Demo_Data/Movies                                          # 1.15%
 /testmatt-rep/Elemental                                                                     # 1.35%
 /Research/Customer_Success/Customers                                                        # 1.79%
 /Research/Engineering/Hardware                                                              # 2.11%
 /testmatt-target/testcopy/flame                                                             # 1.02%
 /Research/Sales_Engineering                                                                 # 1.64%
 /testmatt-target/Elemental                                                                  # 1.35%
 /testmatt-target/testcopy/Elemental                                                         # 1.35%
------------ 1 -----------
# Bucket size: 9.2%
 /GenomeIngest                                                                               # 9.18%
------------ 2 -----------
# Bucket size: 33.6%
 /FastQ                                                                                      # 33.62%
------------ 3 -----------
# Bucket size: 38.5%
 /GenomeTemp                                                                                 # 35.09%
 /                                                                                           # 8.05%
-/testmatt-rep/testcopy/flame                                                                
-/testmatt-rep/testcopy/Elemental                                                            
-/testmatt-target/testcopy/Elemental                                                         
-/Research                                                                                   
-/Research/Customer_Success/Conferences/Gainsight_Pulse_2016                                 
-/Research/Sales_Engineering/Tools/Demo_Data/Movies                                          
-/testmatt-rep/Elemental                                                                     
-/GenomeIngest                                                                               
-/FastQ                                                                                      
-/Research/Engineering/Hardware                                                              
-/testmatt-target/testcopy/flame                                                             
-/Research/Sales_Engineering                                                                 
-/testmatt-target/Elemental                                                                  
-/Research/Customer_Success/Customers                                                        

In [119]:
rc = RestClient("<qumulo-cluster>", 8000)
rc.login("<qumulo-user>", "<qumulo-password>");

es = EasySplit(rest_client=rc, split_type='capacity', only_dirs=True, bucket_count=4, min_dir_size=0.005)
es.walk_dir("/")
es.process_paths()


------------ 0 -----------
# Bucket size: 0.245
0.018:  /projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/flame/4KSTEM_ENC2
0.045:  /projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/flame/4KSTEM_DEC
0.006:  /tv-shows/FoxNews HD/2017-07-22
0.006:  /tv-shows/FoxNews HD/2017-07-23
0.006:  /tv-shows/FoxNews HD/2017-08-14
0.006:  /tv-shows/FoxNews HD/2017-07-29
0.006:  /tv-shows/FoxNews HD/2017-08-10
0.006:  /tv-shows/FoxNews HD/2017-08-12
0.006:  /tv-shows/FoxNews HD/2017-08-13
0.006:  /tv-shows/FoxNews HD/2017-09-13
0.006:  /tv-shows/FoxNews HD/2017-07-08
0.006:  /tv-shows/FoxNews HD/2017-06-24
0.006:  /tv-shows/FoxNews HD/2017-07-02
0.006:  /tv-shows/FoxNews HD/2017-07-01
0.006:  /tv-shows/FoxNews HD/2017-07-04
0.006:  /tv-shows/FoxNews HD/2017-09-20
0.006:  /tv-shows/FoxNews HD/2017-09-25
0.005:  /projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/pixspan
0.006:  /tv-shows/FoxNews HD/2017-07-30
0.006:  /tv-shows/FoxNews HD/2017-08-08
0.006:  /tv-shows/FoxNews HD/2017-08-20
0.018:  /projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/flame/4KSTEM_ENC
0.006:  /tv-shows/FoxNews HD/2017-06-26
0.029:  /projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/pixspan/media-uncompressed/4KStem-D
0.006:  /tv-shows/FoxNews HD/2017-09-03
0.006:  /tv-shows/FoxNews HD/2017-07-15
0.006:  /tv-shows/FoxNews HD/2017-07-14
0.006:  /tv-shows/FoxNews HD/2017-07-11
------------ 1 -----------
# Bucket size: 0.248
0.006:  /tv-shows/FoxNews HD/2017-08-22
0.006:  /tv-shows/FoxNews HD/2017-08-24
0.006:  /tv-shows/FoxNews HD/2017-08-27
0.006:  /tv-shows/FoxNews HD/2017-07-20
0.006:  /tv-shows/FoxNews HD/2017-07-21
0.006:  /tv-shows/FoxNews HD/2017-08-18
0.006:  /tv-shows/FoxNews HD/2017-07-25
0.006:  /tv-shows/FoxNews HD/2017-08-30
0.006:  /tv-shows/FoxNews HD/2017-08-31
0.006:  /tv-shows/FoxNews HD/2017-07-28
0.006:  /tv-shows/FoxNews HD/2017-08-15
0.006:  /tv-shows/FoxNews HD/2017-08-16
0.006:  /tv-shows/FoxNews HD/2017-08-17
0.006:  /tv-shows/FoxNews HD/2017-08-11
0.006:  /tv-shows/FoxNews HD/2017-07-09
0.006:  /tv-shows/FoxNews HD/2017-06-30
0.006:  /tv-shows/FoxNews HD/2017-06-23
0.006:  /tv-shows/FoxNews HD/2017-07-06
0.006:  /tv-shows/FoxNews HD/2017-07-07
0.006:  /tv-shows/FoxNews HD/2017-07-05
0.006:  /tv-shows/FoxNews HD/2017-09-02
0.006:  /tv-shows/FoxNews HD/2017-07-27
0.006:  /tv-shows/FoxNews HD/2017-07-03
0.006:  /tv-shows/FoxNews HD/2017-07-24
0.006:  /tv-shows/FoxNews HD/2017-08-19
0.006:  /tv-shows/FoxNews HD/2017-08-29
0.006:  /tv-shows/FoxNews HD/2017-07-19
0.006:  /tv-shows/FoxNews HD/2017-07-26
0.006:  /tv-shows/FoxNews HD/2017-08-09
0.006:  /tv-shows/FoxNews HD/2017-08-06
0.006:  /tv-shows/FoxNews HD/2017-08-23
0.006:  /tv-shows/FoxNews HD/2017-08-04
0.006:  /tv-shows/FoxNews HD/2017-08-03
0.006:  /tv-shows/FoxNews HD/2017-08-02
0.006:  /tv-shows/FoxNews HD/2017-08-01
0.006:  /tv-shows/FoxNews HD/2017-08-26
0.006:  /tv-shows/FoxNews HD/2017-09-01
0.006:  /tv-shows/FoxNews HD/2017-06-25
0.006:  /tv-shows/FoxNews HD/2017-09-04
0.006:  /tv-shows/FoxNews HD/2017-07-18
0.006:  /tv-shows/FoxNews HD/2017-07-16
0.006:  /tv-shows/FoxNews HD/2017-07-10
0.006:  /tv-shows/FoxNews HD/2017-07-13
0.006:  /tv-shows/FoxNews HD/2017-07-12
------------ 2 -----------
# Bucket size: 0.214
0.051:  /projects/Justin-Bieber-Oh-Canada/Assets
0.006:  /tv-shows/FoxNews HD/2017-09-12
0.006:  /tv-shows/FoxNews HD/2017-09-11
0.006:  /tv-shows/FoxNews HD/2017-09-10
0.006:  /tv-shows/FoxNews HD/2017-09-17
0.006:  /tv-shows/FoxNews HD/2017-09-16
0.006:  /tv-shows/FoxNews HD/2017-09-15
0.006:  /tv-shows/FoxNews HD/2017-09-14
0.006:  /tv-shows/FoxNews HD/2017-09-19
0.006:  /tv-shows/FoxNews HD/2017-09-18
0.006:  /tv-shows/FoxNews HD/2017-09-22
0.006:  /tv-shows/FoxNews HD/2017-09-23
0.006:  /tv-shows/FoxNews HD/2017-09-21
0.006:  /tv-shows/FoxNews HD/2017-09-26
0.006:  /tv-shows/FoxNews HD/2017-09-24
0.006:  /tv-shows/FoxNews HD/2017-08-28
0.006:  /tv-shows/FoxNews HD/2017-07-31
0.006:  /tv-shows/FoxNews HD/2017-08-21
0.006:  /tv-shows/FoxNews HD/2017-08-05
0.006:  /tv-shows/FoxNews HD/2017-08-25
0.006:  /tv-shows/FoxNews HD/2017-06-27
0.006:  /tv-shows/FoxNews HD/2017-08-07
0.006:  /tv-shows/FoxNews HD/2017-09-05
0.006:  /tv-shows/FoxNews HD/2017-09-06
0.006:  /tv-shows/FoxNews HD/2017-09-07
0.006:  /tv-shows/FoxNews HD/2017-09-08
0.006:  /tv-shows/FoxNews HD/2017-09-09
0.006:  /tv-shows/FoxNews HD/2017-07-17
0.006:  /tv-shows/FoxNews HD/2017-06-29
0.006:  /tv-shows/FoxNews HD/2017-06-28
------------ 3 -----------
# Bucket size: 0.293
0.108:  /projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/Elemental
0.013:  /projects/Justin-Bieber-Oh-Canada
0.106:  /query_db/base/16385
0.006:  /
0.054:  /query_db/base/44991544
    : -/projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/flame/4KSTEM_DEC
    : -/tv-shows/FoxNews HD/2017-07-20
    : -/tv-shows/FoxNews HD/2017-07-21
    : -/tv-shows/FoxNews HD/2017-07-22
    : -/tv-shows/FoxNews HD/2017-07-23
    : -/tv-shows/FoxNews HD/2017-07-24
    : -/tv-shows/FoxNews HD/2017-07-25
    : -/tv-shows/FoxNews HD/2017-08-30
    : -/tv-shows/FoxNews HD/2017-08-31
    : -/tv-shows/FoxNews HD/2017-07-28
    : -/tv-shows/FoxNews HD/2017-07-29
    : -/tv-shows/FoxNews HD/2017-09-11
    : -/tv-shows/FoxNews HD/2017-07-09
    : -/tv-shows/FoxNews HD/2017-06-30
    : -/tv-shows/FoxNews HD/2017-09-18
    : -/projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/flame/4KSTEM_ENC
    : -/tv-shows/FoxNews HD/2017-07-19
    : -/tv-shows/FoxNews HD/2017-07-18
    : -/tv-shows/FoxNews HD/2017-07-15
    : -/tv-shows/FoxNews HD/2017-07-14
    : -/tv-shows/FoxNews HD/2017-07-17
    : -/tv-shows/FoxNews HD/2017-07-16
    : -/tv-shows/FoxNews HD/2017-07-11
    : -/tv-shows/FoxNews HD/2017-07-10
    : -/tv-shows/FoxNews HD/2017-07-13
    : -/tv-shows/FoxNews HD/2017-07-12
    : -/tv-shows/FoxNews HD/2017-09-13
    : -/tv-shows/FoxNews HD/2017-09-12
    : -/tv-shows/FoxNews HD/2017-07-08
    : -/tv-shows/FoxNews HD/2017-09-10
    : -/tv-shows/FoxNews HD/2017-09-17
    : -/tv-shows/FoxNews HD/2017-09-16
    : -/tv-shows/FoxNews HD/2017-09-15
    : -/tv-shows/FoxNews HD/2017-09-14
    : -/tv-shows/FoxNews HD/2017-07-02
    : -/tv-shows/FoxNews HD/2017-07-03
    : -/tv-shows/FoxNews HD/2017-09-19
    : -/tv-shows/FoxNews HD/2017-07-01
    : -/tv-shows/FoxNews HD/2017-07-06
    : -/tv-shows/FoxNews HD/2017-07-07
    : -/tv-shows/FoxNews HD/2017-07-04
    : -/tv-shows/FoxNews HD/2017-07-05
    : -/projects/Justin-Bieber-Oh-Canada/Assets
    : -/tv-shows/FoxNews HD/2017-07-26
    : -/tv-shows/FoxNews HD/2017-07-27
    : -/tv-shows/FoxNews HD/2017-09-01
    : -/tv-shows/FoxNews HD/2017-09-02
    : -/tv-shows/FoxNews HD/2017-09-03
    : -/tv-shows/FoxNews HD/2017-09-04
    : -/tv-shows/FoxNews HD/2017-09-05
    : -/tv-shows/FoxNews HD/2017-09-06
    : -/tv-shows/FoxNews HD/2017-09-07
    : -/tv-shows/FoxNews HD/2017-09-08
    : -/tv-shows/FoxNews HD/2017-09-09
    : -/tv-shows/FoxNews HD/2017-09-22
    : -/tv-shows/FoxNews HD/2017-09-23
    : -/tv-shows/FoxNews HD/2017-09-20
    : -/tv-shows/FoxNews HD/2017-09-21
    : -/tv-shows/FoxNews HD/2017-09-26
    : -/tv-shows/FoxNews HD/2017-09-24
    : -/tv-shows/FoxNews HD/2017-09-25
    : -/projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/pixspan/media-uncompressed/4KStem-D
    : -/tv-shows/FoxNews HD/2017-08-09
    : -/tv-shows/FoxNews HD/2017-08-08
    : -/tv-shows/FoxNews HD/2017-08-07
    : -/tv-shows/FoxNews HD/2017-08-06
    : -/tv-shows/FoxNews HD/2017-08-05
    : -/tv-shows/FoxNews HD/2017-08-04
    : -/tv-shows/FoxNews HD/2017-08-03
    : -/tv-shows/FoxNews HD/2017-08-02
    : -/tv-shows/FoxNews HD/2017-08-01
    : -/projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/flame/4KSTEM_ENC2
    : -/tv-shows/FoxNews HD/2017-08-18
    : -/tv-shows/FoxNews HD/2017-08-19
    : -/tv-shows/FoxNews HD/2017-08-14
    : -/tv-shows/FoxNews HD/2017-08-15
    : -/tv-shows/FoxNews HD/2017-08-16
    : -/tv-shows/FoxNews HD/2017-08-17
    : -/tv-shows/FoxNews HD/2017-08-10
    : -/tv-shows/FoxNews HD/2017-08-11
    : -/tv-shows/FoxNews HD/2017-08-12
    : -/tv-shows/FoxNews HD/2017-08-13
    : -/projects/Katy-Perry-Taylor-Swift-Rihanna-Beyonce-Collab/pixspan
    : -/tv-shows/FoxNews HD/2017-08-29
    : -/tv-shows/FoxNews HD/2017-08-28
    : -/tv-shows/FoxNews HD/2017-07-31
    : -/tv-shows/FoxNews HD/2017-07-30
    : -/tv-shows/FoxNews HD/2017-08-21
    : -/tv-shows/FoxNews HD/2017-08-20
    : -/tv-shows/FoxNews HD/2017-08-23
    : -/tv-shows/FoxNews HD/2017-08-22
    : -/tv-shows/FoxNews HD/2017-08-25
    : -/tv-shows/FoxNews HD/2017-08-24
    : -/tv-shows/FoxNews HD/2017-08-27
    : -/tv-shows/FoxNews HD/2017-08-26
    : -/tv-shows/FoxNews HD/2017-06-27
    : -/tv-shows/FoxNews HD/2017-06-26
    : -/tv-shows/FoxNews HD/2017-06-25
    : -/tv-shows/FoxNews HD/2017-06-24
    : -/tv-shows/FoxNews HD/2017-06-23
    : -/tv-shows/FoxNews HD/2017-06-29
    : -/tv-shows/FoxNews HD/2017-06-28

In [ ]: